library(ggplot2)
library(RRreg)
library(ggpubr)
library(boot)
library(stargazer)
library(estimatr)

rm(list = ls())

#####################################################################################################################
# Replication code for "Messaging About Corruption: The Power of Social Norms", Governance
# Author: Mattias Agerberg, University of Gothenburg, mattias.agerberg@gu.se
# MAIN MANUSCRIPT + APPENDIX

#####################################################################################################################
# STUDY 1

#####################################################################################################################
# Data, study 1
data1 <- read.csv("...study1-data.csv") 

#####################################################################################################################
# WD
setwd("") 

#####################################################################################################################
# Estimated share: never justified
#####################################################################################################################

# RR
summary(RRuni(crosswise.bribe, data = data1[is.na(data1$crosswise.bribe)==F,], p = 0.25, model = "Crosswise"))

# Direct
mean(data1$justifieddirect, na.rm = T)

#####################################################################################################################
# FIGURE 1: beliefs about others, study 1
#####################################################################################################################

all <- ggplot(data1, aes(x=bribeothers)) +
  geom_histogram(aes(y = (..count..)/sum(..count..)), colour="black", fill="grey20", alpha = 0.75, binwidth = 10) + 
  labs(x="", y="", title = "All respondents (N = 1302)") +
  ylim(0,0.265) +
  theme_minimal(base_size = 7)

uncertain <- ggplot(data1[as.numeric(data1$certain)<3,], aes(x=bribeothers)) +
  geom_histogram(aes(y = (..count..)/sum(..count..)), colour="black", fill="grey20", alpha = 0.75, binwidth = 10) + 
  labs(x="", y="", title = "Uncertain respondents (N = 785)") +
  ylim(0,0.265) +
  theme_minimal(base_size = 7)

certain <- ggplot(data1[as.numeric(data1$certain)>=3,], aes(x=bribeothers)) +
  geom_histogram(aes(y = (..count..)/sum(..count..)), colour="black", fill="grey20", alpha = 0.75, binwidth = 10) + 
  labs(x="", y="", title = "Certain respondents (N = 519)") +
  ylim(0,0.265) +
  theme_minimal(base_size = 7)

# Combine graphs
list.combined <- ggarrange(all, uncertain, certain,
                           ncol = 3, nrow = 1, common.legend = TRUE, legend = "bottom")
list.combined <- annotate_figure(list.combined, 
                                 left = text_grob("Density",size = 8,rot = 90,hjust = 0.05,vjust = 1),
     bottom = text_grob("Believed share of other people saying that accepting a bribe is never justifiable",
                        size = 8))
ggsave("figure1.tiff",width=16,height=5, units = "cm", device = "tiff", dpi = 800)

# Mean values
mean(data1$bribeothers, na.rm = T)
sd(data1$bribeothers, na.rm = T)/sqrt(length(data1$bribeothers[!is.na(data1$bribeothers)]))
sd(data1$bribeothers, na.rm = T)

mean(data1$bribeothers[as.numeric(data1$certain)>=3], na.rm = T)
mean(data1$bribeothers[as.numeric(data1$certain)<3], na.rm = T)

#####################################################################################################################
# Figure 1, Appendix D: trust in others 
#####################################################################################################################

ggplot(data1, aes(x = factor(high.trust, 
                             levels = c("Disagree strongly", "Disagree", "Neither agree nor disagree",
                                        "Agree", "Agree strongly"), 
                             labels = c("Disagree \n strongly", "Disagree", "Neither agree \n nor disagree",
                                        "Agree", "Agree \n strongly")))) +  
  geom_bar(aes(y = (..count..)/sum(..count..)), colour="black", fill="grey20", alpha = 0.75) +
  theme_minimal(base_size = 15) +
  labs(x="", y="Share", title = "Most people can be trusted") +
  theme(axis.text.x = element_text(vjust = 1))
ggsave("trust.pdf")

#####################################################################################################################
# Other
#####################################################################################################################
# Table 3, Appendix D: descriptives (sample)
descriptives <- data1[,c("female","university.ed","large.city","age")]
descriptives$age[descriptives$age>100] <- NA
stargazer(descriptives, summary.stat = c("mean","sd","min","max","n"), 
          covariate.labels = c("Female respondent","University degree",
                               "Large city (over 1,000 000 inhabitants)",
                               "Age"),
          digits = 2,
          title = "Sample characteristics, study 1", 
          notes = "Note: The question about `household income' was not asked in study 1", 
          notes.append = TRUE)

median(descriptives$house.income, na.rm = TRUE)
median(descriptives$age, na.rm = TRUE)
IQR(descriptives$house.income, na.rm = TRUE)

#####################################################################################################################
# Heterogeneity and generalizability
#####################################################################################################################
# Table 5 + heterogeneity analysis, Appendix D
het <- data1[,c("female","university.ed","large.city","age","crosswise.bribe","justifieddirect")]
het$agesq <- (het$age-18)^2

bribe <- lm(justifieddirect~female+university.ed+large.city+I(age-18)+agesq, 
            data = het)

summary(bribe)

stargazer(bribe, title="Predicting bribe justification",
          align=TRUE, dep.var.labels.include = TRUE, 
          dep.var.labels = c("Bribe not justified"),
          omit.stat=c("LL","ser","f","rsq","aic"), no.space=TRUE,
          covariate.labels = c("Female", "University ed.", "Large city", "Age", "Age$^2$"),
          notes = "The model was estimated with OLS. Robust standard errors shown in parentheses. The covariate 
          `Large city' indicates whether respondents reported that they live in a city with more than 1,000 000 
          inhabitants. $^{*}$p$<$0.05; $^{**}$p$<$0.01; $^{***}$p$<$0.001.", 
          notes.append = FALSE, notes.align = "l", column.sep.width = "10", digits = 2,
          star.cutoffs = c(0.05,0.01,0.001), font.size = "footnotesize", se =starprep(bribe))

het <- data1[,c("female","university.ed","large.city","age","crosswise.bribe","justifieddirect")]
het$agesq <- (het$age)^2

bribe <- lm(justifieddirect~female+university.ed+large.city+age+agesq, 
            data = het)

new.frame <- data.frame(female = c(1,1), university.ed = c(1,0), large.city = c(1,0),
                        age = c(25,60), agesq = c(25^2, 60^2))

predict(bribe, newdata = new.frame)